import sqlite3
import urllib2
import sys
import time

from bs4 import BeautifulSoup

reload(sys)
sys.setdefaultencoding('utf-8')
con = sqlite3.connect("qiushibaike.db")

def download(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
            'Accept': '*/*'
        }
        print 'begin download:', url
        request = urllib2.Request(url, headers=headers)
        response = urllib2.urlopen(request)
        html = response.read()
    except urllib2.URLError as e:
        print 'download error:', url
        html = None
    except Exception as e:
        print 'download error:', url
        html = None
    time.sleep(3)
    return html


def get_all_list(page=1):
    url = 'https://www.qiushibaike.com/hot/page/%s/' % (str(page))
    response = download(url)
    soup = BeautifulSoup(response)
    content_href_list=['https://www.qiushibaike.com'+item.attrs['href'] for item in soup.find_all('a',attrs={'class':'contentHerf'})]
    get_detail_content(content_href_list[0])



def get_detail_content(url):
    response = download(url)
    soup = BeautifulSoup(response)
    content = soup.find('div',attrs={'class':'content'})
    file=open('qiushibaike.html', mode='w')
    # file.write(content.text)
    file.write(soup.prettify())
    file.close()



if __name__ == '__main__':
    # if sys.argv[1] == '--help':
    #     print ''
    # elif sys.argv[1]:
    #     keyword=sys.argv[1]
    #     get_all_books_by_tag(keyword)
    get_all_list()
